# +~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~ #  
#
#' @title  Summarize results of classifiers' evaluations on test set
#' @author Hauke Licht
#
# +~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~ #

# setup ----

# load required packages
library(readr)
library(dplyr)
library(purrr)
library(tidyr)

# set paths
base_path <- file.path(".")
data_path <- file.path(base_path, "data", "output")
res_dir <- file.path(data_path, "classifier_results")

# load files with test set results from different classification approaches ----

files <- list.files(res_dir, "_test_results", full.names = TRUE)
names(files) <- sub("_.+$", "", basename(files))
res <- map_dfr(files, read_tsv, .id = "approach")

# clean the TSV (some columns named differently)
res_clean <- res %>% 
  mutate(
    what = ifelse(is.na(what), `Unnamed: 0`, what)
    , what = ifelse(what == "accuracy", "micro avg", what)
  ) %>% 
  select(approach, model, what, precision, recall, `f1-score`) %>% 
  # drop class prevalence-weighted metrics 
  filter(!grepl("weighted", what)) %>% 
  separate(model, c("learner", "features"), sep = "_", fill = "right") %>% 
  mutate(
    features = case_when(
      approach == "bow+mt" ~ "Bag-of-words",
      approach == "xlmt" ~ "XLM-T",
      features == "lasers" ~ "LASER embeddings",
      features == "xlmrs" ~ "XLM-R embeddings",
      TRUE ~ features
    )
    , approach = case_when(
      approach == "xlmt" ~ "transformer",
      approach == "bow+mt" ~ "bow",
      TRUE ~ approach
    )
  )


fp <- file.path(res_dir, "all_classifier_results.tab")
if (!file.exists(fp))
  write_tsv(res_clean, fp)

# get best results by approach ----

best_classifiers <- res_clean %>% 
  filter(what == "pos") %>% 
  group_by(approach) %>% 
  filter(`f1-score` == max(`f1-score`)) %>% 
  ungroup() %>% 
  distinct(approach, learner, features) %>% 
  left_join(res_clean)
  
fp <- file.path(res_dir, "best_classifier_results.tab")
if (!file.exists(fp))
  write_tsv(best_classifiers, fp)
